#!/usr/bin/env python3
# -*- coding: utf-8 -*-
__author__ = '汤小洋'

import jieba
# 导入Counter类，用于统计值出现的次数
from collections import Counter
from wordcloud import WordCloud, STOPWORDS

f = open('resources/天龙八部.txt')
txt = f.read()
f.close()

words = jieba.lcut(txt)

# 排除单个字符的词语
new_words = list(filter(lambda x: len(x) > 1, words))

# 统计出现次数最多的前20个词语
counts = Counter(new_words).most_common(30)  # most_common()省略参数时表示返回所有元素，返回一个元组列表
print(counts)  # 根据该输出结果来设置屏蔽词

new_txt = ' '.join(new_words)
stopwords = STOPWORDS.copy()
stopwords.add('说道')
stopwords.add('自己')
stopwords.add('一个')
stopwords.add('什么')
stopwords.add('不是')
stopwords.add('武功')
stopwords.add('甚么')
stopwords.add('一声')
stopwords.add('咱们')
stopwords.add('不知')
stopwords.add('师父')
stopwords.add('心中')
stopwords.add('知道')
stopwords.add('出来')
wc = WordCloud(width=400, height=400, max_font_size=100, max_words=10, background_color='white',
               font_path='STKAITI.TTF', stopwords=stopwords).generate(new_txt)

wc.to_file('词云图.png')
